# coding:utf-8
import pandas as pd
import numpy as np



if __name__ == '__main__':
	main_dir = "C:\\Users\\谷雪松\\Documents\\gxs\\datatsets\\fresh_comp_offline\\"
	sub_dir = "../DataSet/"
	user_table = pd.read_csv(sub_dir + 'tianchi_fresh_comp_train_user.csv')
	item_table = pd.read_csv(sub_dir + 'tianchi_fresh_comp_train_item.csv')
	user_table = user_table[user_table.item_id.isin(list(item_table.item_id))]
	user_table['days'] = user_table['time'].map(lambda x:x.split(' ')[0])
	user_table['hours'] = user_table['time'].map(lambda x:x.split(' ')[1])
	user_table = user_table[user_table['days'] != '2014-12-12']
	user_table = user_table[user_table['days'] != '2014-12-11']
	user_table.to_csv(main_dir + 'drop1112_sub_item.csv',index=None)
	pass
	# print (user_table.head(20))
	# print (item_table.head(20))
	user_table = user_table.head(3000)
	user_table.to_csv(sub_dir + 'user_table_sub_item.csv',index=None)
	item_table = item_table.head(3000)
	item_table.to_csv(sub_dir + 'item_table_sub_item.csv', index=None)

